#!/bin/bash
# wraps around commands to produce GPU kernel
# $1 = input ll name
# $2 = OpenCL/SPIR/HSAIL kernel file name
# $3 = (optional) --hsa will trigger HSAIL lowering
#      (optional) --spir will trigger SPIR lowering
#      (optional) --opencl will trigger OpenCL C lowering
# $4 = (optional) --amdgpu-target=(GPU family name)
#                          selects AMDGPU target
#
# in case $3 is not set, then either SPIR or OpenCL C kernel
# will be emitted according to the capability of GPU

# enable bash debugging
KMDBSCRIPT="${KMDBSCRIPT:=0}"

# dump the LLVM bitcode
KMDUMPLLVM="${KMDUMPLLVM:=0}"

if [ $KMDBSCRIPT == "1" ]
then
  set -x
fi

# check command line arguments
if [ "$#" -lt 2 ]; then
  echo "Usage: $0 input_LLVM output_kernel (--hsa | --spir | --opencl) (--amdgpu-target=(GPU family name) (--verbose)" >&2
  echo "  --hsa    will trigger HSAIL/BRIG kernel output" >&2
  echo "  --spir   will trigger SPIR       kernel output" >&2
  echo "  --opencl will trigger OpenCL C   kernel output" >&2
  echo "  --amdgpu-target=(GPU family name)" >&2
  echo "           selects AMDGPU target" >&2
  echo "  --verbose will enable GPU target lowering output" >&2
  exit 1
fi

if [ ! -f $1 ]; then
  echo "input LLVM IR $1 is not valid" >&2
  exit 1
fi

BINDIR=$(dirname $0)
EMBED=$BINDIR/clamp-embed
AS=$BINDIR/llvm-as
OPT=$BINDIR/opt
LLC=$BINDIR/llc
LINK=$BINDIR/llvm-link
MATHLIB=$BINDIR/../../lib
LIB=$BINDIR/../lib
HSATOOLS=$BINDIR/clamp-hsatools

################
# Verbose flag
################

VERBOSE=0

################
# AMDGPU target 
################

AMDGPU_TARGET=@AMDGPU_TARGET@


ARGS="$@"
for ARG in $ARGS
do
  ######################
  # Parse verbose flag
  ######################
  if [ $ARG == "--verbose" ]; then
    VERBOSE=1
  fi

  ######################
  # Parse AMDGPU target
  ######################
  case $ARG in
    --amdgpu-target=*)
    AMDGPU_TARGET="${ARG#*=}"
    continue
    ;;
  esac
done

HSA_USE_AMDGPU_BACKEND=@HSA_USE_AMDGPU_BACKEND@

if [ $HSA_USE_AMDGPU_BACKEND == "ON" ]; then
  KM_USE_AMDGPU="${KM_USE_AMDGPU:=1}"
fi

# emit HSAIL/BRIG kernel
if [ "$3" == "--hsa" ]; then
    if [ $KMDUMPLLVM == "1" ]; then
      cp $1 ./dump.input.bc
    fi

    if [ "$CLAMP_NOTILECHECK" == "ON" ]; then
      if [ "$ALWAYS_MALLOC" == "ON" ]; then
        $OPT -load $LIB/LLVMPromote@CMAKE_SHARED_LIBRARY_SUFFIX@ \
             -load $LIB/LLVMEraseNonkernel@CMAKE_SHARED_LIBRARY_SUFFIX@ \
             -erase-nonkernels -promote-globals -promote-privates -malloc-select -always-malloc -dce -globaldce -S < $1 -o $2.promote.ll.orig
      else
        $OPT -load $LIB/LLVMPromote@CMAKE_SHARED_LIBRARY_SUFFIX@ \
             -load $LIB/LLVMEraseNonkernel@CMAKE_SHARED_LIBRARY_SUFFIX@ \
             -erase-nonkernels -promote-globals -promote-privates -malloc-select -dce -globaldce -S < $1 -o $2.promote.ll.orig
      fi
    else
      if [ "$ALWAYS_MALLOC" == "ON" ]; then
        $OPT -load $LIB/LLVMPromote@CMAKE_SHARED_LIBRARY_SUFFIX@ \
             -load $LIB/LLVMEraseNonkernel@CMAKE_SHARED_LIBRARY_SUFFIX@ \
             -load $LIB/LLVMTileUniform@CMAKE_SHARED_LIBRARY_SUFFIX@ \
             -erase-nonkernels -promote-globals -promote-privates -tile-uniform -malloc-select -always-malloc -dce -globaldce -S < $1 -o $2.promote.ll.orig
      else
        $OPT -load $LIB/LLVMPromote@CMAKE_SHARED_LIBRARY_SUFFIX@ \
             -load $LIB/LLVMEraseNonkernel@CMAKE_SHARED_LIBRARY_SUFFIX@ \
             -load $LIB/LLVMTileUniform@CMAKE_SHARED_LIBRARY_SUFFIX@ \
             -erase-nonkernels -promote-globals -promote-privates -tile-uniform -malloc-select -dce -globaldce -S < $1 -o $2.promote.ll.orig

      fi
    fi

    if [ $? == 1 ]; then
      echo "Generating HSAIL BRIG kernel failed"
      exit 1
    fi

    # remove special section information for AMDGPU backend
    if [ $KM_USE_AMDGPU ] ; then
      $OPT -load $LIB/LLVMRemoveSpecialSection@CMAKE_SHARED_LIBRARY_SUFFIX@ \
           -remove-special-section -S < $2.promote.ll.orig -o $2.promote.ll.orig.new
      if [ $? == 0 ]; then
        mv -f $2.promote.ll.orig.new $2.promote.ll.orig
      fi
    fi

    sed "s/call /call spir_func /g" < $2.promote.ll.orig | sed "s/addrspacecast /bitcast /g" > $2.promote.ll

    if [ $KMDUMPLLVM == "1" ]; then
      cp $2.promote.ll ./dump.promote.ll
    fi

    $AS -o $2.promote.bc $2.promote.ll

    if [ $VERBOSE == 1 ]; then
      echo "Generating HSA Brig kernel"
    fi
    $LINK $MATHLIB/hsa_math.bc $2.promote.bc -o $2 2>/dev/null

    if [ $KMDUMPLLVM == "1" ]; then
      cp $2 ./dump.hsa_math_linked.bc
    fi

    if [ "$AMDGPU_TARGET" != "" ]; then
      $HSATOOLS $2 --amdgpu-target=$AMDGPU_TARGET
    else
      $HSATOOLS $2
    fi
    RETVAL=$?
    if [ $RETVAL == 0 ]; then
      mv -f $2 $2.orig
      mv $2.brig $2
      # remove temp file
      rm $2.promote.ll.orig $2.promote.ll $2.promote.bc
    fi
    exit $RETVAL
fi

# emit SPIR kernel
if [ "$3" == "--spir" ]; then
    if [ "$CLAMP_NOTILECHECK" == "ON" ]; then
      $OPT -load $LIB/LLVMPromote@CMAKE_SHARED_LIBRARY_SUFFIX@ \
           -load $LIB/LLVMEraseNonkernel@CMAKE_SHARED_LIBRARY_SUFFIX@ \
           -erase-nonkernels -promote-globals -dce -globaldce < $1 -o $2.promote.bc
    else
      $OPT -load $LIB/LLVMPromote@CMAKE_SHARED_LIBRARY_SUFFIX@ \
           -load $LIB/LLVMEraseNonkernel@CMAKE_SHARED_LIBRARY_SUFFIX@ \
           -load $LIB/LLVMTileUniform@CMAKE_SHARED_LIBRARY_SUFFIX@ \
           -erase-nonkernels -promote-globals -tile-uniform -dce -globaldce < $1 -o $2.promote.bc
    fi

    # convert fdiv float instructions to builtin calls
    # only apply this patch when HCC_DIVPRECISE_PATCH environment variable set to ON
    if [ "$HCC_DIVPRECISE_PATCH" == "ON" ]; then
      $OPT -load $LIB/LLVMDivisionPrecision.so \
           -divprecise < $2.promote.bc -o $2.promote.bc.new
      mv $2.promote.bc.new $2.promote.bc
    fi

    if [ $? == 1 ]; then
      echo "Generating OpenCL SPIR kernel failed"
      exit 1
    fi
    if [ $VERBOSE == 1 ]; then
      echo "Generating OpenCL SPIR kernel"
    fi
    $LINK $MATHLIB/opencl_math.bc $2.promote.bc -o $2 2>/dev/null
    # remove temp file
    rm $2.promote.bc
    exit $?
fi

# emit OpenCL C kernel
if [ "$3" == "--opencl" ]; then
    if [ "$CLAMP_NOTILECHECK" == "ON" ]; then
      $OPT -load $LIB/LLVMPromote@CMAKE_SHARED_LIBRARY_SUFFIX@ \
           -load $LIB/LLVMEraseNonkernel@CMAKE_SHARED_LIBRARY_SUFFIX@ \
           -erase-nonkernels -promote-globals -dce -globaldce < $1 -o $2.promote.bc
    else
      $OPT -load $LIB/LLVMPromote@CMAKE_SHARED_LIBRARY_SUFFIX@ \
           -load $LIB/LLVMEraseNonkernel@CMAKE_SHARED_LIBRARY_SUFFIX@ \
           -load $LIB/LLVMTileUniform@CMAKE_SHARED_LIBRARY_SUFFIX@ \
           -erase-nonkernels -promote-globals -tile-uniform -dce -globaldce < $1 -o $2.promote.bc
    fi
    if [ $? == 1 ]; then
      echo "Generating OpenCL C kernel failed"
      exit 1
    fi
    if [ $VERBOSE == 1 ]; then
      echo "Generating OpenCL C kernel"
    fi
    $LLC $2.promote.bc -march=c -o - |cat $MATHLIB/opencl_math.cl - > $2

    if grep -q " double " $2; then
        cat $MATHLIB/opencl_prefix.cl $2 > $2.t
        mv -f $2.t $2
    fi
    # remove temp file
    rm $2.promote.bc
    exit $?
fi


hasSPIR() {
    [ -z "$CLAMP_NOSPIR" -a -x /usr/bin/clinfo ] && ( /usr/bin/clinfo|grep cl_khr_spir > /dev/null )
    return $?
}
# if there is no mode specified, fallback to default behavior
if [ "$CLAMP_NOTILECHECK" == "ON" ]; then
  $OPT -load $LIB/LLVMPromote@CMAKE_SHARED_LIBRARY_SUFFIX@ \
       -load $LIB/LLVMEraseNonkernel@CMAKE_SHARED_LIBRARY_SUFFIX@ \
       -erase-nonkernels -promote-globals -dce -globaldce < $1 -o $2.promote.bc
else
  $OPT -load $LIB/LLVMPromote@CMAKE_SHARED_LIBRARY_SUFFIX@ \
       -load $LIB/LLVMEraseNonkernel@CMAKE_SHARED_LIBRARY_SUFFIX@ \
       -load $LIB/LLVMTileUniform@CMAKE_SHARED_LIBRARY_SUFFIX@ \
       -erase-nonkernels -promote-globals -tile-uniform -dce -globaldce < $1 -o $2.promote.bc
fi
if [ $? == 1 ]; then
  echo "Generating OpenCL kernel failed"
  exit 1
fi
if hasSPIR; then
    if [ $VERBOSE == 1 ]; then
      echo "Generating OpenCL SPIR kernel"
    fi
    $LINK $MATHLIB/opencl_math.bc $2.promote.bc -o $2 2>/dev/null
    # remove temp file
    rm $2.promote.bc
    exit $?
else
    if [ $VERBOSE == 1 ]; then
      echo "Generating OpenCL C kernel"
    fi
    $LLC $2.promote.bc -march=c -o - |cat $MATHLIB/opencl_math.cl - > $2

    if grep -q " double " $2; then
        cat $MATHLIB/opencl_prefix.cl $2 > $2.t
        mv -f $2.t $2
    fi
    # remove temp file
    rm $2.promote.bc
    exit $?
fi
